{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torchvision\n",
    "import torch.optim as optim\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "from torch.utils.data import DataLoader\n",
    "from train_densenet import *\n",
    "from data_utils import *\n",
    "from math import ceil"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DenseNet(\n",
       "  (features): Sequential(\n",
       "    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n",
       "    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "    (relu0): ReLU(inplace=True)\n",
       "    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
       "    (denseblock1): _DenseBlock(\n",
       "      (denselayer1): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer2): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(96, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer3): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer4): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(160, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer5): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer6): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(224, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "    )\n",
       "    (transition1): _Transition(\n",
       "      (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "      (relu): ReLU(inplace=True)\n",
       "      (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "      (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)\n",
       "    )\n",
       "    (denseblock2): _DenseBlock(\n",
       "      (denselayer1): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer2): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(160, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer3): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer4): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(224, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer5): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer6): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(288, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(288, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer7): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(320, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer8): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(352, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(352, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer9): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer10): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(416, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(416, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer11): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(448, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(448, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer12): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(480, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "    )\n",
       "    (transition2): _Transition(\n",
       "      (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "      (relu): ReLU(inplace=True)\n",
       "      (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "      (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)\n",
       "    )\n",
       "    (denseblock3): _DenseBlock(\n",
       "      (denselayer1): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer2): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(288, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(288, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer3): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(320, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer4): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(352, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(352, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer5): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer6): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(416, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(416, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer7): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(448, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(448, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer8): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(480, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer9): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer10): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(544, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(544, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer11): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(576, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer12): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(608, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(608, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer13): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(640, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer14): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(672, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer15): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(704, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(704, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer16): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(736, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(736, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer17): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(768, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer18): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(800, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(800, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer19): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(832, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(832, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer20): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(864, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(864, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer21): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(896, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(896, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer22): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(928, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(928, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer23): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(960, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer24): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(992, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(992, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "    )\n",
       "    (transition3): _Transition(\n",
       "      (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "      (relu): ReLU(inplace=True)\n",
       "      (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "      (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)\n",
       "    )\n",
       "    (denseblock4): _DenseBlock(\n",
       "      (denselayer1): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer2): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(544, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(544, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer3): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(576, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer4): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(608, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(608, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer5): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(640, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer6): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(672, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer7): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(704, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(704, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer8): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(736, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(736, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer9): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(768, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer10): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(800, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(800, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer11): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(832, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(832, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer12): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(864, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(864, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer13): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(896, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(896, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer14): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(928, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(928, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer15): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(960, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer16): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(992, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(992, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "    )\n",
       "    (norm5): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "  )\n",
       "  (classifier): Linear(in_features=1024, out_features=1000, bias=True)\n",
       ")"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.backends.cudnn.benchmark = True\n",
    "densenet = torchvision.models.densenet121(pretrained=True)\n",
    "densenet.cuda()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "densenet.classifier.out_features = 257"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# def weights_init(m):\n",
    "#     \"\"\"\n",
    "#     用于初始化模型的权值和偏置\n",
    "#     \"\"\"\n",
    "#     classname = m.__class__.__name__\n",
    "#     if classname.find('Conv') != -1:\n",
    "#         m.weight.data.normal_(0.0, 0.02)\n",
    "#     elif classname.find('BatchNorm') != -1:\n",
    "#         m.weight.data.normal_(1.0, 0.02)\n",
    "#         m.bias.data.fill_(0)\n",
    "#     elif classname.find('Linear') != -1:\n",
    "#         m.weight.data.normal_(0.0, 0.02)\n",
    "#         if m.bias is not None:\n",
    "#             m.bias.data.fill_(0.0)\n",
    "\n",
    "# densenet.apply(weights_init)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "param = []\n",
    "for p in densenet.parameters():\n",
    "    if p.requires_grad == True:\n",
    "        param.append(p)\n",
    "\n",
    "optimizer = optim.Adam(param,\n",
    "                       lr=5e-4,\n",
    "                       betas=(0.5, 0.999))\n",
    "\n",
    "criterion = nn.CrossEntropyLoss()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "24387\n"
     ]
    }
   ],
   "source": [
    "batch_size = 16\n",
    "train_folder, val_folder = get_folders()\n",
    "\n",
    "train_iterator = DataLoader(\n",
    "    train_folder, batch_size=batch_size, num_workers=0,\n",
    "    shuffle=True, pin_memory=False\n",
    ")\n",
    "\n",
    "val_iterator = DataLoader(\n",
    "    val_folder, batch_size=16, num_workers=0,\n",
    "    shuffle=False, pin_memory=False\n",
    ")\n",
    "\n",
    "# number of training samples\n",
    "train_size = len(train_folder.imgs)\n",
    "print(train_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1525\n"
     ]
    }
   ],
   "source": [
    "n_epochs = 50\n",
    "n_batches = ceil(train_size/batch_size)\n",
    "print(n_batches)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "validation_step: 0.50  avg_train_loss: 4.398  avg_val_loss: 3.669  avg_train_acc: 0.188  avg_val_acc: 0.227  time_per_val_step: 210.349\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-8-e31d28526316>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[0mtrain_iterator\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mn_epochs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mn_batches\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m     \u001b[0mval_iterator\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalidation_step\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m763\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mn_validation_batches\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m80\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m     \u001b[0msaving_epoch\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlr_scheduler\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      6\u001b[0m )\n",
      "\u001b[1;32md:\\A-DATA_STORAGE\\大三上学期\\CV\\实验5\\train_densenet.py\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(model, criterion, optimizer, train_iterator, n_epochs, n_batches, val_iterator, validation_step, n_validation_batches, saving_epoch, lr_scheduler)\u001b[0m\n\u001b[0;32m    113\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    114\u001b[0m             batch_loss, batch_accuracy = optimization_step(\n\u001b[1;32m--> 115\u001b[1;33m                 \u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx_batch\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_batch\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    116\u001b[0m             )\n\u001b[0;32m    117\u001b[0m             \u001b[0mrunning_loss\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0mbatch_loss\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32md:\\A-DATA_STORAGE\\大三上学期\\CV\\实验5\\train_densenet.py\u001b[0m in \u001b[0;36moptimization_step\u001b[1;34m(model, criterion, optimizer, x_batch, y_batch)\u001b[0m\n\u001b[0;32m     64\u001b[0m     \u001b[1;31m# compute logloss\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     65\u001b[0m     \u001b[0mloss\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlogits\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_batch\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 66\u001b[1;33m     \u001b[0mloss_cpu\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mloss\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     67\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     68\u001b[0m     \u001b[1;31m# print(\"loss shape: \", loss_cpu.data)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "all_losses = train(\n",
    "    densenet, criterion, optimizer, \n",
    "    train_iterator, n_epochs, n_batches, \n",
    "    val_iterator, validation_step=763, n_validation_batches=80, \n",
    "    saving_epoch=5, lr_scheduler=None\n",
    ")\n"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "494899efd6527d56ea7f55c588d0081523a17dc3a9ff1107f3394ad815ff2527"
  },
  "kernelspec": {
   "display_name": "Python 3.7.7 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
